## This file cleans MIT's County Presidential Election Returns, downloaded from: https://doi.org/10.7910/DVN/VOQCHQ ##
## Created by Meredith Dost and last run 8/16/2025 ##
## The purpose is to create a Democratic vote share variable for every year of analysis ##
## that is the lagged Dem presidential vote share ##

# set working directory
#setwd("voting_data/input_data/")

# read in data file
pres <- read.csv("countypres_2000-2020.csv")

# subset to 2008 and clean
pres08 <- subset(pres, year==2008 & candidate=="BARACK OBAMA")
pres08$demshare <- pres08$candidatevotes/pres08$totalvotes
pres08 <- pres08[c("county_fips","demshare")]
names(pres08)[1] <- "fips"
# removing blank rows
pres08 <- pres08[complete.cases(pres08[,1]),]
pres10 <- pres08
pres10$year <- 2010
pres08$year <- 2012

# subset to 2012 and clean
pres12 <- subset(pres, year==2012 & candidate=="BARACK OBAMA")
pres12$demshare <- pres12$candidatevotes/pres12$totalvotes
pres12 <- pres12[c("county_fips","demshare")]
names(pres12)[1] <- "fips"
# removing blank rows
pres12 <- pres12[complete.cases(pres12[,1]),]
pres14 <- pres12
pres12$year <- 2014
pres14$year <- 2016

# subset to 2016 and clean
pres16 <- subset(pres, year==2016 & candidate=="HILLARY CLINTON")
pres16$demshare <- pres16$candidatevotes/pres16$totalvotes
pres16 <- pres16[c("county_fips","demshare")]
names(pres16)[1] <- "fips"
# removing blank rows
pres16 <- pres16[complete.cases(pres16[,1]),]
pres18 <- pres16
pres16$year <- 2018
pres18$year <- 2020


### merge ###
dem <- rbind.data.frame(pres08,pres10,pres12,pres14,pres16,pres18)

#setwd("voting_data/")
write.table(dem, "demvoteshare_by_county.csv",sep=",",row.names=F)

